# Reading in the data
initial_data <- read.csv("Data_Econ_index.csv", na.strings=c("","NA"))

#removing top two rows
initial_data = initial_data[-c(1,2),]

initial_data$primary.location <- str_replace_all(initial_data$primary.location, "Yerevan", "Yerevan, Armenia")
initial_data$primary.location <- str_replace_all(initial_data$primary.location, "Yerevan, Armenia, Armenia", "Yerevan, Armenia")
initial_data$funding <- str_replace_all(initial_data$funding, "N/A", "Unknown")
initial_data$X..of.founders <- str_replace_all(initial_data$X..of.founders, " N/A", "Unknown")
initial_data$X..of.founders <- str_replace_all(initial_data$X..of.founders, "N/A", "Unknown")
initial_data$X..of.founders <- str_replace_all(initial_data$X..of.founders, "4\\(\\?\\)", "4")



# Changing the missing data to unknown
data_unknowns <- initial_data %>%
  mutate(primary.location = fct_explicit_na(initial_data$primary.location, na_level = "Unknown")) %>%
  mutate(accelerator.incubator = fct_explicit_na(initial_data$accelerator.incubator, na_level = "Unknown")) %>%
  mutate(current.stage = fct_explicit_na(initial_data$current.stage, na_level = "Unknown")) %>%
  mutate(funding = fct_explicit_na(initial_data$funding, na_level = "Unknown")) %>%
  mutate(date.published = fct_explicit_na(initial_data$date.published, na_level = "Unknown")) %>%
  mutate(X..of.founders = fct_explicit_na(initial_data$X..of.founders, na_level = "Unknown")) %>%
  select (-c(date.range.for.government.support))

yearExtract <- function(string) {
  t <- regmatches(string, regexec("[0-9]{4}", string))
  sapply(t, function(x) {
    if(length(x) > 0){
      return(as.numeric(x))
    } else {
      return(NA)    
    }
  })
}

# Changing date published data to be only in years format

data_unknowns$date.published <- yearExtract(as.character(data_unknowns$date.published))

yrs <- data_unknowns$date.published
yr <- as.Date(as.character(yrs), format = "%Y")
data_unknowns$date.published <- year(yr)


# Changing NAs to be Unknown
data_unknowns$date.published[is.na(data_unknowns$date.published)] <- 'Unknown'
# Making the date as a counted observation in my df
data_unknowns2 <- transform(data_unknowns, count = table(date.published)[date.published])

# Removing unknown dates and grouping by variables of interest
walk <- data_unknowns2 %>%
  filter(date.published != "Unknown") %>%
  group_by(interaction(current.stage, date.published))

# Grouping again
walk2 <- transform(walk, count = table(interaction(current.stage, date.published))[interaction(current.stage, date.published)])

# Keeping only distinct variables
walk2 <- distinct(walk2, count.Var1, .keep_all = TRUE)

# Creating Streamgraph, failing at getting labels :(
walk2 %>%
  streamgraph("current.stage", "count.Freq.1", "date.published") %>%
    sg_fill_manual(c("#ffa500", "blue", "purple", "red", "#00ff00", "red")) %>%
  sg_legend(show=TRUE, label="Phase of Startup")%>%
  sg_axis_x(1, "year", "%Y") %>%
  sg_title(title = "Phases of Startups in Armenia 2016-2020")
Phases of Startups in Armenia 2016-2020
# AUA Epic
# one_cofounder_AUAepic <- data_unknowns2 %>%
#   filter(X..of.founders == 1) %>%
#   filter(accelerator.incubator == "AUA EPIC")
# 
# two_cofounder_AUAepic <- data_unknowns2 %>%
#   filter(X..of.founders == 2) %>%
#   filter(accelerator.incubator == "AUA EPIC")
# 
# three_cofounder_AUAepic <- data_unknowns2 %>%
#   filter(X..of.founders == 3) %>%
#   filter(accelerator.incubator == "AUA EPIC")
# 
# four_cofounder_AUAepic <- data_unknowns2 %>%
#   filter(X..of.founders == 4) %>%
#   filter(accelerator.incubator == "AUA EPIC")
# 
# five_up_cofounder_STEP <- data_unknowns2 %>%
#  filter(accelerator.incubator == "AUA EPIC") %>%
#  filter(X..of.founders == 5 | X..of.founders == 6)

# 
# unknown_cofounder_AUAepic <- data_unknowns2 %>%
#   filter(X..of.founders == "Unknown") %>%
#   filter(accelerator.incubator == "AUA EPIC")

#################

# AUA Epic
# one_cofounder_STEP <- data_unknowns2 %>%
#   filter(X..of.founders == 1) %>%
#   filter(accelerator.incubator == "STEP EIF")
# 
# two_cofounder_STEP <- data_unknowns2 %>%
#   filter(X..of.founders == 2) %>%
#   filter(accelerator.incubator == "STEP EIF")
# 
# three_cofounder_STEP <- data_unknowns2 %>%
#   filter(X..of.founders == 3) %>%
#   filter(accelerator.incubator == "STEP EIF")
# 
# four_cofounder_STEP <- data_unknowns2 %>%
#   filter(X..of.founders == 4) %>%
#   filter(accelerator.incubator == "STEP EIF")
# 
# five_up_cofounder_STEP <- data_unknowns2 %>%
#   filter(X..of.founders == 5 | X..of.founders == 6) %>%
#   filter(accelerator.incubator == "STEP EIF")
# 
# unknown_cofounder_STEP <- data_unknowns2 %>%
#   filter(X..of.founders == "Unknown") %>%
#   filter(accelerator.incubator == "STEP EIF")
#################

# # House AI
# one_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == 1) %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")
# 
# two_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == 2) %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")
# 
# three_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == 3) %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")
# 
# four_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == 4) %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")
# 
# five_up_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == 5 | X..of.founders == 6) %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")
# 
# unknown_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == "Unknown") %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")

##################

# House AI
# one_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == 1) %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")
# 
# two_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == 2) %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")
# 
# three_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == 3) %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")
# 
# four_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == 4) %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")
# 
# five_up_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == 5 | X..of.founders == 6) %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")

# unknown_cofounder_House <- data_unknowns2 %>%
#   filter(X..of.founders == "Unknown") %>%
#   filter(accelerator.incubator == "Hero House AI Incubator")

##################

# # Unknown Incubator
# one_cofounder_unknown <- data_unknowns2 %>%
#   filter(X..of.founders == 1) %>%
#   filter(accelerator.incubator == "Unknown")
# 
# two_cofounder_unknown <- data_unknowns2 %>%
#   filter(X..of.founders == 2) %>%
#   filter(accelerator.incubator == "Unknown")
# 
# three_cofounder_unknown <- data_unknowns2 %>%
#   filter(X..of.founders == 3) %>%
#   filter(accelerator.incubator == "Unknown")
# 
# four_cofounder_unknown <- data_unknowns2 %>%
#   filter(X..of.founders == 4) %>%
#   filter(accelerator.incubator == "Unknown")
# 
# five_up_cofounder_unknown <- data_unknowns2 %>%
#   filter(X..of.founders == 5 | X..of.founders == 6) %>%
#   filter(accelerator.incubator == "Unknown")
# 
# unknown_cofounder_unknown <- data_unknowns2 %>%
#   filter(X..of.founders == "Unknown") %>%
#   filter(accelerator.incubator == "Unknown")


###########
# 
# # Armenia Startup Academy
# one_cofounder_Start_Acad <- data_unknowns2 %>%
#   filter(X..of.founders == 1) %>%
#   filter(accelerator.incubator == "Armenia Startup Academy")
# 
# two_cofounder_Start_Acad <- data_unknowns2 %>%
#   filter(X..of.founders == 2) %>%
#   filter(accelerator.incubator == "Armenia Startup Academy")
# 
# three_cofounder_Start_Acad <- data_unknowns2 %>%
#   filter(X..of.founders == 3) %>%
#   filter(accelerator.incubator == "Armenia Startup Academy")
# 
# four_cofounder_Start_Acad <- data_unknowns2 %>%
#   filter(X..of.founders == 4) %>%
#   filter(accelerator.incubator == "Armenia Startup Academy")
# 
# five_up_cofounder_Start_Acad <- data_unknowns2 %>%
#   filter(X..of.founders == 5 | X..of.founders == 6) %>%
#   filter(accelerator.incubator == "Armenia Startup Academy")
# 
# unknown_cofounder_Start_Acad <- data_unknowns2 %>%
#   filter(X..of.founders == "Unknown") %>%
#   filter(accelerator.incubator == "Armenia Startup Academy")
# 


sankey_startups <- plot_ly(
    type = "sankey",
    orientation = "h",

    node = list(
      label = c("Armenia Startup Academy", "AUA Epic","STEP EIF","Hero House AI", "Unknown Incubator/Accelerator","1 Founder","2 Cofounders","3 Cofounders","4 Cofounders", "5 + Cofounders", "Unknown Cofounders"),
      color = c("red", "red","red","red","red","dodgerblue","cornflowerblue","blue","mediumblue","midnightblue","skyblue"),
      pad = 15,
      thickness = 20,
      line = list(
        color = "black",
        width = 0.5
      )
    ),

    link = list(
      source = c(1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,0,0,0,0,0,0),
      target = c(5,6,7,8,9,10,5,6,7,8,9,10,5,6,7,8,9,10,5,6,7,8,9,10,5,6,7,8,9,10),
      value =  c(6,8,10,9,10,9,10,6,0,0,0,29,0,0,0,0,0,7,2,6,4,3,0,3,0,0,0,0,0,65)
    )
  )

sankey_startups <- sankey_startups %>% layout(
    title = "Number of Cofounders Based on Incubator",
    font = list(
      size = 10
    )
)

sankey_startups